% Copyright (C) 2017 Koz Ross <koz.ross@retro-freedom.nz>

% This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 
% International License. To view a copy of this license, visit 
% http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to Creative 
% Commons, PO Box 1866, Mountain View, CA 94042, USA.

\documentclass{beamer}

\usepackage[utf8]{inputenc}
\usecolortheme{seahorse}
\usefonttheme{professionalfonts}
\usepackage{fontspec}
\setmainfont{DejaVu Sans}
\setbeamertemplate{navigation symbols}{}
\usepackage{caption}
\usepackage[font=itshape, begintext=``, endtext='']{quoting}
\usepackage{fancyvrb}
\usepackage{color}
\usepackage[noend]{algpseudocode}
\usepackage{amsmath}

\title{Sorting}
\subtitle{Or: some of what Koz spent about two years of his life on}
\titlegraphic{\includegraphics[scale=0.8]{img/logo.pdf}}
\author{Koz Ross}
\date{5th October, 2017}

\begin{document}

\begin{frame}[c]
  \titlepage{}
\end{frame}

\begin{frame}[c]
  \frametitle{Outline}
  \tableofcontents
\end{frame}

\section{Introduction}

\begin{frame}[fragile, c]
  \frametitle{What is sorting and why we care}

  {\em Sorting\/} is putting things in some kind of order.\pause{} This is very
  useful, for a range of reasons:\pause{}
  
  \begin{itemize}
    \item Some tasks are impossible without it (e.g.\ finding the
      median)\pause{}
    \item Some tasks are much easier on sorted data (e.g.\ searching for a
      specific item)\pause{}
    \item Generalizes many other tasks (e.g.\ top-$k$ queries)\pause{}
    \item Sorting is used {\em everywhere\/}\pause{}
  \end{itemize}

  \bigskip

  As a result, sorting is one of {\em the\/} oldest computer science problems we
  have, and has been studied {\em to death}.
\end{frame}

\begin{frame}[fragile,c]
  \frametitle{A little tour}
  \pause{}
  \begin{itemize}
    \item A precise definition of sorting\pause{}
    \item Some known sorting algorithms\pause{}
    \item Inherent limit on the performance of {\em any\/} algorithm\pause{}
    \item Limitations of this result\pause{}
  \end{itemize}

  \bigskip

  Without further ado, let's commence!
\end{frame}

\section{Defining sorting precisely}

\begin{frame}[fragile,c]
  \frametitle{Some basics}

  Let $\mathbb{N} = \{0, 1, 2, \ldots\}$ be the set of {\em natural
  numbers}.\pause{} A {\em $n$-tuple\/} $t = (a_0, a_1, \ldots, a_{n-1})$ is a
  collection of elements with fixed positions. We use $t[i]$ to denote $a_i$ for
  $i \in 0,1,\ldots,n - 1$.\pause{}

  \begin{definition}
    Let $A,B$ be sets. The {\em Cartesian product\/} of $A$ and $B$ is the set
    
    \[
      A \times B = \{(x, y) \mid x \in A, y \in B\}
    \] 
  \end{definition}\pause{}

  \begin{example}
    Let $A = \{1, 2, 3\}, B = \{a, b\}$. $A \times B$ is
    \[
      \{(1,a), (1,b), (2,a), (2,b), (3,a), (3,b)\}
    \]
  \end{example}\pause{}

  We denote the special case of $A \times A$ as $A^2$.

\end{frame}

\begin{frame}[fragile,c]
  \frametitle{Relations}

  \begin{definition}
    Let $A,B$ be sets. A {\em (binary) relation between $A$ and $B$\/} is $R \subseteq A
    \times B$. For $x \in A, y \in B$, we write $xRy$ to mean $(x,y) \in R$.
  \end{definition}\pause{}

  \bigskip

  We can think of a relation as explicitly spelling out what things in $A$ and
  $B$ are related.
\end{frame}

\begin{frame}[fragile,c]
  \frametitle{Ordering}

  \begin{definition}
    Let $S$ be a set. A {\em (non-strict) ordering on $S$\/} is a binary relation
    $\leqq \subseteq S^2$, such that $\leqq$ has the following
    properties:\pause{}

    \begin{description}[Antisymmetry:]
      \item[Antisymmetry:] For any $a,b \in S$, if $a \leqq b$ and $b \leqq a$,
        then $a = b$;\pause{}
      \item[Transitivity:] For any $a,b,c \in S$, if $a \leqq b$ and $b \leqq
        c$, then $a \leqq c$; and\pause{}
      \item[Totality:] For any $a,b \in S$, $a \leqq b$ or $b \leqq a$.\pause{}
    \end{description}
  \end{definition}

  \bigskip

  We can read $a \leqq b$ as `$a$ does not come after $b$ according to the
  ordering $\leqq$'.\pause{} We can have multiple orderings on various sets.
\end{frame}

\begin{frame}[fragile,c]
  \frametitle{Ordering examples}

  Consider $\mathbb{N}$. We can see that $\leq$ is an ordering on
  $\mathbb{N}$:\pause{}

  \begin{description}[Antisymmetry:]
    \item[Antisymmetry:] If we have two natural numbers $x,y$ such that $x \leq
      y$ and $y \leq x$, they must be equal;\pause{}
    \item[Transitivity:] We can `chain together' $\leq$ in exactly this
      way;\pause{}
    \item[Totality:] Any two natural numbers can be compared with
      $\leq$.\pause{}
  \end{description}

  We can also do something similar with $\geq$. If we look at other sets, we get
  many other orderings (e.g.\ lex and reverse lex for strings).
\end{frame}

\begin{frame}[fragile,c]
  \frametitle{Defining sorting}
  \pause{}
  \begin{definition}
    Let $S$ be a set, $t$ be an $n$-tuple of elements of $S$, and $\leqq$ be an
    ordering on $S$. Given $t,\leqq$ as input, the {\em sorting problem\/} requires
    us to return an $n$-tuple $t^{\prime}$ such that:\pause{}
    
    \begin{itemize}
      \item Every element of $t$ is an element of $t^{\prime}$; and\pause{}
      \item For any $i, j \in 0, 1, \ldots, n-1$, if $i \leq j$ then
        $t^{\prime}[i] \leqq t^{\prime}[j]$.\pause{}
    \end{itemize}
  \end{definition}

  \bigskip

  Put simply, the sorting problem requires us to put $t$ `in order' according to
  $\leqq$.
\end{frame}

\section{Some sorting algorithms}

\begin{frame}[fragile,c]
  \frametitle{Shifting to algorithms}

  Since we're dealing with algorithms, we have to be precise with our
  assumptions. We also want to be as general as possible.\pause{}

  \begin{definition}
    A {\em comparison sort\/} is an algorithm for the sorting problem that does
    not make any additional assumptions about the input tuple, aside from what
    is required by the sorting problem.
  \end{definition}\pause{}

  \bigskip

  We can view this in several ways:\pause{}

  \begin{itemize}
    \item Most general kind of sort (assumes only what it must)\pause{}
    \item Most `difficult' sort (no additional `hacks' we can lean on based on
      the data)\pause{}
    \item Closest to a `pure' view of how hard the sorting problem is (no `extra
      baggage' to confuse analysis)
  \end{itemize}
\end{frame}

\begin{frame}[fragile,c]
  \frametitle{Our approach to analysis}
 
  We'll take the `default' for algorithm analysis:\pause{}
  
  \bigskip

  \begin{description}[Asymptotic:]
    \item[RAM model:]
      \begin{itemize}
        \item Serial processor (can only do one thing at a time)\pause{}
        \item Non-hierarchical, addressable memory\pause{}
        \item Primitive operations take constant time\pause{}
      \end{itemize}
    \item[Asymptotic:] 
      \begin{itemize}
        \item Based on the size of the input\pause{}
        \item Care about the {\em growth rate\/} of the time required\pause{}
      \end{itemize}
    \item[Worst-case:]
      \begin{itemize}
        \item If we'd have different results for same-size inputs, take the
          worst one\pause{}
        \item All input tuple elements are unique\pause{}
        \item Tuple elements are `random' (i.e.\ no sorted sub-sequences)
      \end{itemize}
  \end{description}
\end{frame}

\begin{frame}[fragile,c]
  \frametitle{Algorithms we know}
  \pause{}

  \begin{description}[`Fast' ($O(n \log(n))$):]
    \item[`Slow' ($O(n^2)$):] Bubble sort, insertion sort, selection sort,
      etc.\pause{}
    \item[`Fast' ($O(n \log(n))$):] Mergesort, quicksort, introsort, timesort,
      etc.\pause{}
  \end{description}

  \bigskip

  Can we do better than this? Is there {\em any\/} algorithm that can beat the
  `fast' ones?\pause{} \alert{No.}
\end{frame}

\section{Limits on performance}

\begin{frame}[fragile, c]
  \begin{centering}
    \includegraphics[scale=0.5]{img/bigger-preliminaries.pdf}
  \end{centering}
\end{frame}

\begin{frame}[fragile,c]
  \frametitle{Factorials and permutations}

  \begin{definition}
    Let $n \in \mathbb{N}$. The {\em factorial of $n$\/} is
    \[
      n! = \begin{cases}
        1 & \text{if } n = 0\text{; and}\\
        n \cdot n - 1 & \text{otherwise}\\
                   \end{cases}
    \]
    Alternatively, 
    \[
      n! = \prod_{i=1}^{n} i = 1 \times 2 \times \cdots \times n
    \]
  \end{definition}\pause{}

  \begin{definition}
    Let $S$ be a set of $n$ elements. A {\em permutation of $S$\/} is an $n$-tuple
    of unique elements of $S$.
  \end{definition}\pause{}

  \begin{example}
    Two possible permutations of $S = \{1, 2, 3\}$ are $(1,3,2), (2, 3, 1)$.
  \end{example}
\end{frame}

\begin{frame}[fragile,c]
  \frametitle{Relating factorials and permutations}
  \begin{lemma}
    Let $S$ be a set of $n$ elements. There are $n!$ possible permutations of
    $S$.
  \end{lemma}\pause{}

  \begin{proof}
    We use induction on $n$.\pause{} When $n = 0$, we
    observe that the only permutation is $()$. As $0! =
    1$, the lemma holds for $n = 0$.\pause{}
    
    \medskip

    When $n > 0$, suppose that the lemma holds to some $k \geq 0$. Thus, there 
    are $k!$ permutations of any $k$-element set $S$.\pause{} Without loss of 
    generality, we observe that any
    $k+1$-element set $S^{\prime}$ is equal to some $k$-element set $S$ with an
    additional element $u \not\in S$.\pause{} Thus, to convert a permutation of
    $S$ into a permutation $p$ of $S^{\prime}$, we need to `insert' $u$ into 
    $p$.\pause{} As
    there are $k+1$ possible positions to insert into for each permutation of
    $S$, the number of
    possible permutations of $S^{\prime}$ is thus $k!(k+1) = (k+1)!$. Thus, the 
    lemma holds for all $n$.
  \end{proof}
\end{frame}

\begin{frame}[fragile,c]
  \frametitle{Why this matters}
  \pause{}
  Given our assumptions (worst-case), the sorting problem basically involves
  finding a specific permutation (the one where everything is in the right
  place).\pause{} Thus, the amount of work {\em any\/} comparison sort will have
  to do will be based on $n!$ somehow.\pause{}

  \bigskip

  The factorial function is a gigantic pain to analyze. Let's make our lives
  simpler:\pause{}

  \begin{definition}[Stirling approximation]
    $\log_2(n!)$ is $O(n\log(n))$ and $n\log(n)$ is $O(\log_2(n!))$.
  \end{definition}\pause{}

  \bigskip
  Essentially, the Stirling approximation tells us that $\log_2(n!)$ and $n
  \log(n)$ grow at the same rate asymptotically.\pause{} We say $\log_2(n!)$ is
  $\Theta(n\log(n))$ in such a case.
\end{frame}

\begin{frame}[fragile,c]
  \frametitle{The proof, at last}
  \begin{theorem}
    Any comparison sort must perform at least $\Theta(n\log(n))$ comparisons
    between elements of the input.
  \end{theorem}\pause{}

  \begin{proof}
    In order to do its work, a comparison sort must find one specific
    permutation out of $n!$ possibilities.\pause{} Each comparison we perform
    can eliminate at most half of the possible permutations at that
    step.\pause{} Thus, any comparison sort must perform at least $\log_2(n!)$
    comparisons to ensure correctness.\pause{} By the Stirling approximation,
    this is $\Theta(n\log(n))$.
  \end{proof}\pause{}

  \begin{corollary}
    Under our assumptions, no comparison sort with a time complexity better than
    $\Theta(n\log(n))$ (and thus, $O(n\log(n))$) can exist.
  \end{corollary}
\end{frame}

\begin{frame}[fragile,c]
  \frametitle{Is there nothing we can do?}
  \pause{}
  This is definitely a strong result that transcends any particular algorithm.
  However:\pause{}

  \begin{itemize}
    \item There are {\em practical\/} improvements we can make:\pause{}
      \begin{itemize}
        \item Not all data will be this bad!\pause{}
        \item Not all $O(n\log(n))$ algorithms are born equal (consider timsort
          versus mergesort)\pause{}
      \end{itemize}
    \item We usually know more about our data (numbers, limited number of unique
      items, strings, etc)\pause{}
    \item RAM is not the most accurate model of modern computers:\pause{}
      \begin{itemize}
        \item Modern machines are parallel --- lots of different optimality
          points there!\pause{}
        \item Modern memory is {\em very\/} hierarchical --- also lots of
          optimality points to consider\pause{}
      \end{itemize}
    \item Data is not static or centralized anymore:\pause{}
      \begin{itemize}
        \item Interest in partial or online sorts\pause{}
        \item Fully-dynamic sorts (data can change in arbitrary ways)\pause{}
        \item Distributed computing
      \end{itemize}
  \end{itemize}
\end{frame}

\begin{frame}[fragile,c]
  \frametitle{The most important corollary}
  \begin{corollary}
    Know your data, your tools and your problem, and you can work (or discover)
    performance wonders.
  \end{corollary}\pause{}

  Still work to be done in this area --- for many years to come!
\end{frame}

\section{Questions}

\begin{frame}[fragile, c]
  \frametitle{Questions?}
  \begin{center}
  \includegraphics[scale=0.27]{img/questions.pdf}
\end{center}
\end{frame}

\end{document}

